7 Functions
cluster_kegg_max <- cluster_kegg %>%
filter(!is.na(secondary_cluster)) %>%
select(-overall_strategy) %>%
group_by(secondary_cluster) %>%
summarise(across(everything(), max, na.rm = TRUE))7.1 Trait overview
phylum_heatmap <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(cluster_taxonomy, by=join_by(phylum == Phylum)) %>%
filter(secondary_cluster %in% cluster_tree$tip.label) %>%
arrange(match(secondary_cluster, cluster_tree$tip.label)) %>%
select(secondary_cluster,phylum) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
filter(!is.na(secondary_cluster)) %>%
column_to_rownames(var = "secondary_cluster")
function_tree <- keep.tip(cluster_tree, tip=rownames(phylum_heatmap))
function_table <- cluster_kegg_max %>%
filter(secondary_cluster %in% function_tree$tip.label) %>%
column_to_rownames(var="secondary_cluster")
# Generate basal tree
function_tree <- force.ultrametric(function_tree, method="extend") %>%
ggtree(., size = 0.3) ***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
function_tree <- gheatmap(function_tree, phylum_heatmap, offset=0, width=0.1, colnames=FALSE) +
scale_fill_manual(values=phylum_colors) +
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
function_tree <- function_tree + new_scale_fill()
#Add functions heatmap
function_tree <- gheatmap(function_tree, function_table, offset=0.5, width=3.5, colnames=FALSE) +
vexpand(.08) +
coord_cartesian(clip = "off") +
scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white") +
labs(fill="GIFT")
#Reset fill scale to use a different colour profile in the heatmap
function_tree <- function_tree + new_scale_fill()
function_tree
## Functional ordination
Rtsne(X=cluster_kegg_max %>% column_to_rownames(var="secondary_cluster"), dims = 2, check_duplicates = FALSE)$Y %>%
as.data.frame() %>%
mutate(secondary_cluster=cluster_kegg_max$secondary_cluster) %>%
rename(tSNE1=V1,tSNE2=V2) %>%
left_join(cluster_taxonomy,by="secondary_cluster") %>%
left_join(cluster_prevalence,by="secondary_cluster") %>%
ggplot(aes(x = tSNE1, y = tSNE2, color = Phylum, size=n_strategy))+
geom_point(shape=16, alpha=0.7) +
scale_color_manual(values=phylum_colors) +
theme_minimal() +
labs(color="Phylum", size="Number of strategies") +
guides(color = guide_legend(override.aes = list(size = 5)))7.2 Trait recovery
all_strategy_clusters <- cluster_prevalence %>%
filter(n_strategy==10) %>%
pull(secondary_cluster)
# Filter clusters recovered in all strategies
cluster_kegg_max_filt <- cluster_kegg_max %>%
filter(secondary_cluster %in% all_strategy_clusters)
cluster_kegg_proportion_max <- cluster_kegg %>%
filter(!is.na(secondary_cluster)) %>%
filter(secondary_cluster %in% all_strategy_clusters) %>%
group_by(secondary_cluster) %>%
mutate(across(where(is.numeric), ~ . / max(., na.rm = TRUE))) %>%
ungroup() %>%
rowwise() %>%
mutate(average = rowMeans(across(where(is.numeric)), na.rm = TRUE)) %>%
select(secondary_cluster,overall_strategy,average)
cluster_kegg_proportion_max %>%
group_by(overall_strategy) %>%
summarise(mean=mean(average),sd=sd(average)) %>%
tt()| overall_strategy | mean | sd |
|---|---|---|
| coassembly_cage_treatment | 0.9685454 | 0.03462967 |
| coassembly_longitudinal | 0.9618531 | 0.05465040 |
| coassembly_treatment | 0.9661473 | 0.04396223 |
| cobinning_cage_treatment | 0.9718577 | 0.03001165 |
| cobinning_longitudinal | 0.9691493 | 0.03665104 |
| cobinning_treatment | 0.9470009 | 0.08477293 |
| individual_individual_binning | 0.9640316 | 0.03520963 |
| multi_split_cage_treatment | 0.9481957 | 0.05988920 |
| multi_split_longitudinal | 0.9611543 | 0.03345183 |
| multi_split_treatment | 0.9846165 | 0.02163771 |
cluster_kegg_proportion_max %>%
left_join(cluster_taxonomy,by="secondary_cluster") %>%
ggplot(aes(x = average, y = overall_strategy, group=overall_strategy, color=Phylum)) +
geom_boxplot(color="#999999", fill="#f4f4f4", outlier.shape = NA) +
scale_color_manual(values=phylum_colors[-c(1,4,6,7)]) +
xlim(0.8, 1)+
geom_jitter(alpha=0.3) +
theme_classic() +
theme() +
labs(x="Function recovery", y="Strategy")cluster_kegg_proportion_max %>%
mutate(secondary_cluster=factor(secondary_cluster,levels=rev(cluster_tree$tip.label[cluster_tree$tip.label %in% cluster_kegg_proportion_max$secondary_cluster]))) %>%
ggplot(aes(x=secondary_cluster,y=overall_strategy,fill=average))+
geom_tile()+
scale_fill_distiller(palette = "YlGnBu", direction = -1) +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.title.y = element_blank()) +
labs(y="Strategy",x="Clusters",fill="Function recovery")